library(tidyverse)
## -- Attaching packages ----------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1     v purrr   0.3.3
## v tibble  2.1.3     v dplyr   0.8.3
## v tidyr   1.0.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts -------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
drivers <- feather::read_feather("drivers.feather")
incidents <- feather::read_feather("incidents.feather")
non_motorists <- feather::read_feather("non_motorists.feather")
combo <- feather::read_feather("combo.feather")
incident_locations <- incidents %>%
  filter(!not_in_county) %>%
  select(longitude, latitude)
kmeans_list <- lapply(1:15,function(x)kmeans(incident_locations, centers = x, nstart = 25, iter.max = 100))
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 2852800)
centroidss <- data.frame(
  centroids = 1:15,
  sum_of_squares = sapply(1:15,function(x)kmeans_list[[x]]$tot.withinss)
  )
centroidss %>%
  ggplot(aes(x = centroids, y = sum_of_squares)) +
  geom_line()

centroidss %>% knitr::kable()
centroids sum_of_squares
1 805.93049
2 304.84448
3 213.07526
4 172.06474
5 139.81327
6 117.46724
7 99.63681
8 87.33599
9 75.62231
10 66.97816
11 60.93547
12 55.44351
13 50.97869
14 47.61560
15 44.16663
incident_clusters <- incident_locations
for(x in 1:15){
  incident_clusters$cluster <- 
    as.factor(paste0("cluster", kmeans_list[[x]]$cluster))
  temp <-incident_clusters %>%
    ggplot(aes(x = longitude, y = latitude, color = cluster)) +
    geom_point()
  temp %>% print()
}

contingency_table <- function(data, var1, var2){
data %>%
  group_by(!!enquo(var1), !!enquo(var2)) %>%
  tally() %>%
  ggplot() +
  geom_bin2d(aes(x = !!enquo(var1), fill = log(n), y = !!enquo(var2))) +
  geom_text(aes(x = !!enquo(var1), label = n, y = !!enquo(var2))) +
  scale_fill_gradient(low = "#ece7f2",
                      high = "#a6bddb") +
  theme(
    panel.grid = element_blank(),
    legend.position = "none",
    panel.background = element_rect(fill = "white")
    )
}
temp <-chisq.test(as.factor(incidents$weather), as.factor(incidents$acrs_report_type))
## Warning in chisq.test(as.factor(incidents$weather),
## as.factor(incidents$acrs_report_type)): Chi-squared approximation may be
## incorrect
temp %>% broom::tidy() %>% knitr::kable()
statistic p.value parameter method
171.3451 0 24 Pearson’s Chi-squared test

p-value 3.249696710^{-24}

contingency_table(incidents, acrs_report_type, weather)

Helmets

helmet_data <- non_motorists %>%
  filter(as.logical(pedestrian_type_bicyclist)) %>%
  select(safety_equipment,injury_severity) %>%
  mutate(safety_equipment = c("no helmet","helmet")[1+as.integer(str_detect(tolower(safety_equipment), "helmet"))])

temp <-chisq.test(as.factor(helmet_data$safety_equipment), as.factor(helmet_data$injury_severity))
## Warning in chisq.test(as.factor(helmet_data$safety_equipment),
## as.factor(helmet_data$injury_severity)): Chi-squared approximation may be
## incorrect
temp %>% broom::tidy() %>% knitr::kable()
statistic p.value parameter method
7.365619 0.117783 4 Pearson’s Chi-squared test
contingency_table(helmet_data, safety_equipment,injury_severity)

Speed Limits

ordered_injury <- drivers %>%
  count(injury_severity) %>%
  arrange(desc(n)) %>%
  pull(injury_severity)

speed_limit_data <-drivers %>%
  select(speed_limit,injury_severity) %>%
  mutate(
    speed_limit = factor(c("Under 35", "35 - 45", "50+")[1 + findInterval(speed_limit, c(31, 46))], levels = c("Under 35", "35 - 45", "50+")),
    injury_severity = factor(injury_severity, ordered_injury)
  )

temp <-chisq.test(as.factor(speed_limit_data$speed_limit), as.factor(speed_limit_data$injury_severity))
## Warning in chisq.test(as.factor(speed_limit_data$speed_limit),
## as.factor(speed_limit_data$injury_severity)): Chi-squared approximation may be
## incorrect
temp %>% broom::tidy() %>% knitr::kable()
statistic p.value parameter method
995.2863 0 8 Pearson’s Chi-squared test

p-value 1.554337810^{-209}

contingency_table(speed_limit_data, speed_limit, injury_severity)

library(prophet)
mcmc_samples <- 1500

temp_data <- incidents %>%
  count(incident_date) %>% 
  rename(ds = incident_date, y = n) %>%
  bind_rows(.,
    data.frame(
      ds = seq(as.Date('2015-01-01'), max(.$ds), by = 'd'),
      y = 0
    )
  ) %>%
  group_by(ds) %>%
  summarise(y = sum(y))
total_model <- prophet(mcmc.samples = mcmc_samples, fit = FALSE) %>%
  add_country_holidays("US") %>%
  fit.prophet(df = temp_data)
## 
## SAMPLING FOR MODEL 'prophet' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0.003 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 30 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 1500 [  0%]  (Warmup)
## Chain 1: Iteration:  150 / 1500 [ 10%]  (Warmup)
## Chain 1: Iteration:  300 / 1500 [ 20%]  (Warmup)
## Chain 1: Iteration:  450 / 1500 [ 30%]  (Warmup)
## Chain 1: Iteration:  600 / 1500 [ 40%]  (Warmup)
## Chain 1: Iteration:  750 / 1500 [ 50%]  (Warmup)
## Chain 1: Iteration:  751 / 1500 [ 50%]  (Sampling)
## Chain 1: Iteration:  900 / 1500 [ 60%]  (Sampling)
## Chain 1: Iteration: 1050 / 1500 [ 70%]  (Sampling)
## Chain 1: Iteration: 1200 / 1500 [ 80%]  (Sampling)
## Chain 1: Iteration: 1350 / 1500 [ 90%]  (Sampling)
## Chain 1: Iteration: 1500 / 1500 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 143.802 seconds (Warm-up)
## Chain 1:                164.334 seconds (Sampling)
## Chain 1:                308.136 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'prophet' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 0 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 1500 [  0%]  (Warmup)
## Chain 2: Iteration:  150 / 1500 [ 10%]  (Warmup)
## Chain 2: Iteration:  300 / 1500 [ 20%]  (Warmup)
## Chain 2: Iteration:  450 / 1500 [ 30%]  (Warmup)
## Chain 2: Iteration:  600 / 1500 [ 40%]  (Warmup)
## Chain 2: Iteration:  750 / 1500 [ 50%]  (Warmup)
## Chain 2: Iteration:  751 / 1500 [ 50%]  (Sampling)
## Chain 2: Iteration:  900 / 1500 [ 60%]  (Sampling)
## Chain 2: Iteration: 1050 / 1500 [ 70%]  (Sampling)
## Chain 2: Iteration: 1200 / 1500 [ 80%]  (Sampling)
## Chain 2: Iteration: 1350 / 1500 [ 90%]  (Sampling)
## Chain 2: Iteration: 1500 / 1500 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 134.956 seconds (Warm-up)
## Chain 2:                140.744 seconds (Sampling)
## Chain 2:                275.7 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'prophet' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 0.001 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 10 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 1500 [  0%]  (Warmup)
## Chain 3: Iteration:  150 / 1500 [ 10%]  (Warmup)
## Chain 3: Iteration:  300 / 1500 [ 20%]  (Warmup)
## Chain 3: Iteration:  450 / 1500 [ 30%]  (Warmup)
## Chain 3: Iteration:  600 / 1500 [ 40%]  (Warmup)
## Chain 3: Iteration:  750 / 1500 [ 50%]  (Warmup)
## Chain 3: Iteration:  751 / 1500 [ 50%]  (Sampling)
## Chain 3: Iteration:  900 / 1500 [ 60%]  (Sampling)
## Chain 3: Iteration: 1050 / 1500 [ 70%]  (Sampling)
## Chain 3: Iteration: 1200 / 1500 [ 80%]  (Sampling)
## Chain 3: Iteration: 1350 / 1500 [ 90%]  (Sampling)
## Chain 3: Iteration: 1500 / 1500 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 164.024 seconds (Warm-up)
## Chain 3:                160.649 seconds (Sampling)
## Chain 3:                324.673 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'prophet' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 0.001 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 10 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 1500 [  0%]  (Warmup)
## Chain 4: Iteration:  150 / 1500 [ 10%]  (Warmup)
## Chain 4: Iteration:  300 / 1500 [ 20%]  (Warmup)
## Chain 4: Iteration:  450 / 1500 [ 30%]  (Warmup)
## Chain 4: Iteration:  600 / 1500 [ 40%]  (Warmup)
## Chain 4: Iteration:  750 / 1500 [ 50%]  (Warmup)
## Chain 4: Iteration:  751 / 1500 [ 50%]  (Sampling)
## Chain 4: Iteration:  900 / 1500 [ 60%]  (Sampling)
## Chain 4: Iteration: 1050 / 1500 [ 70%]  (Sampling)
## Chain 4: Iteration: 1200 / 1500 [ 80%]  (Sampling)
## Chain 4: Iteration: 1350 / 1500 [ 90%]  (Sampling)
## Chain 4: Iteration: 1500 / 1500 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 149.079 seconds (Warm-up)
## Chain 4:                116.884 seconds (Sampling)
## Chain 4:                265.963 seconds (Total)
## Chain 4:
future <- total_model %>% make_future_dataframe(periods = 365)
total_forecast <- predict(total_model, future)
plot(total_model, total_forecast)

prophet_plot_components(total_model, total_forecast)

dyplot.prophet(total_model,total_forecast)
## Registered S3 method overwritten by 'xts':
##   method     from
##   as.zoo.xts zoo
temp_data <- incidents %>%
  filter(driver_substance_abuse_alcohol_present | driver_substance_abuse_alcohol_contributed) %>% 
  count(incident_date) %>% 
  rename(ds = incident_date, y = n) %>%
  bind_rows(.,
    data.frame(
      ds = seq(as.Date('2015-01-01'), max(.$ds), by = 'd'),
      y = 0
    )
  ) %>%
  group_by(ds) %>%
  summarise(y = sum(y))
alcohol_model <- prophet(mcmc.samples = mcmc_samples) %>%
  add_country_holidays("US") %>%
  fit.prophet(df = temp_data)
## 
## SAMPLING FOR MODEL 'prophet' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 1500 [  0%]  (Warmup)
## Chain 1: Iteration:  150 / 1500 [ 10%]  (Warmup)
## Chain 1: Iteration:  300 / 1500 [ 20%]  (Warmup)
## Chain 1: Iteration:  450 / 1500 [ 30%]  (Warmup)
## Chain 1: Iteration:  600 / 1500 [ 40%]  (Warmup)
## Chain 1: Iteration:  750 / 1500 [ 50%]  (Warmup)
## Chain 1: Iteration:  751 / 1500 [ 50%]  (Sampling)
## Chain 1: Iteration:  900 / 1500 [ 60%]  (Sampling)
## Chain 1: Iteration: 1050 / 1500 [ 70%]  (Sampling)
## Chain 1: Iteration: 1200 / 1500 [ 80%]  (Sampling)
## Chain 1: Iteration: 1350 / 1500 [ 90%]  (Sampling)
## Chain 1: Iteration: 1500 / 1500 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 125.181 seconds (Warm-up)
## Chain 1:                102.083 seconds (Sampling)
## Chain 1:                227.264 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'prophet' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 0.001 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 10 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 1500 [  0%]  (Warmup)
## Chain 2: Iteration:  150 / 1500 [ 10%]  (Warmup)
## Chain 2: Iteration:  300 / 1500 [ 20%]  (Warmup)
## Chain 2: Iteration:  450 / 1500 [ 30%]  (Warmup)
## Chain 2: Iteration:  600 / 1500 [ 40%]  (Warmup)
## Chain 2: Iteration:  750 / 1500 [ 50%]  (Warmup)
## Chain 2: Iteration:  751 / 1500 [ 50%]  (Sampling)
## Chain 2: Iteration:  900 / 1500 [ 60%]  (Sampling)
## Chain 2: Iteration: 1050 / 1500 [ 70%]  (Sampling)
## Chain 2: Iteration: 1200 / 1500 [ 80%]  (Sampling)
## Chain 2: Iteration: 1350 / 1500 [ 90%]  (Sampling)
## Chain 2: Iteration: 1500 / 1500 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 131.418 seconds (Warm-up)
## Chain 2:                90.105 seconds (Sampling)
## Chain 2:                221.523 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'prophet' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 0.001 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 10 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 1500 [  0%]  (Warmup)
## Chain 3: Iteration:  150 / 1500 [ 10%]  (Warmup)
## Chain 3: Iteration:  300 / 1500 [ 20%]  (Warmup)
## Chain 3: Iteration:  450 / 1500 [ 30%]  (Warmup)
## Chain 3: Iteration:  600 / 1500 [ 40%]  (Warmup)
## Chain 3: Iteration:  750 / 1500 [ 50%]  (Warmup)
## Chain 3: Iteration:  751 / 1500 [ 50%]  (Sampling)
## Chain 3: Iteration:  900 / 1500 [ 60%]  (Sampling)
## Chain 3: Iteration: 1050 / 1500 [ 70%]  (Sampling)
## Chain 3: Iteration: 1200 / 1500 [ 80%]  (Sampling)
## Chain 3: Iteration: 1350 / 1500 [ 90%]  (Sampling)
## Chain 3: Iteration: 1500 / 1500 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 132.48 seconds (Warm-up)
## Chain 3:                95.794 seconds (Sampling)
## Chain 3:                228.274 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'prophet' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 0.001 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 10 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 1500 [  0%]  (Warmup)
## Chain 4: Iteration:  150 / 1500 [ 10%]  (Warmup)
## Chain 4: Iteration:  300 / 1500 [ 20%]  (Warmup)
## Chain 4: Iteration:  450 / 1500 [ 30%]  (Warmup)
## Chain 4: Iteration:  600 / 1500 [ 40%]  (Warmup)
## Chain 4: Iteration:  750 / 1500 [ 50%]  (Warmup)
## Chain 4: Iteration:  751 / 1500 [ 50%]  (Sampling)
## Chain 4: Iteration:  900 / 1500 [ 60%]  (Sampling)
## Chain 4: Iteration: 1050 / 1500 [ 70%]  (Sampling)
## Chain 4: Iteration: 1200 / 1500 [ 80%]  (Sampling)
## Chain 4: Iteration: 1350 / 1500 [ 90%]  (Sampling)
## Chain 4: Iteration: 1500 / 1500 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 132.758 seconds (Warm-up)
## Chain 4:                97.425 seconds (Sampling)
## Chain 4:                230.183 seconds (Total)
## Chain 4:
future <- alcohol_model %>% make_future_dataframe(periods = 365)
alcohol_forecast <- predict(alcohol_model, future)
plot(alcohol_model, alcohol_forecast)

prophet_plot_components(alcohol_model, alcohol_forecast)

dyplot.prophet(alcohol_model, alcohol_forecast)